Modelo Poisson 2

Importanto as bibliotecas necessárias

require(tidyverse)
require(plotly)
require(knitr)
require(rstan)
require(reshape2)
require(bayesplot)

Gerando os jogos

num_teams <- 20

# Dicionario para relacionar o id de um time com seu nome
team_names <- c("Dragões do Sertão", "Atlético Rio Vermelho", "Borborema",
                "Guerreiros da Mata", "Cacique", "Aurora Litorânea",
                "Gávea Azul", "Mandacaru United", "Capibaribe", 
                "Índios Tupiniquins", "Atlético Taquara Verde", "Seriema",
                "Blumenau City", "Iguaçu", "Atlético Palmares",
                "Serra Dourada", "Sambaqui", "Pampa", 
                "Riacho do Meio", "Sport Club Xingu")


games <- data.frame(
  h = rep(1:num_teams, each = num_teams),
  a = rep(1:num_teams, times = num_teams)
)

games <- games[games$h != games$a, ]

Definindo os parâmetros dos dados gerados:

set.seed(28)

mu_att_h <- 0
sd_att_h <- 0.25
mu_att_a <- 0
sd_att_a <- 0.25

mu_def_h <- 0
sd_def_h <- 0.25
mu_def_a <- 0
sd_def_a <- 0.25

beta_0 <- rnorm(1, 0, 0.1)
home_effect <- rnorm(1, 0.2, 0.1)
att_h_effects <- rnorm(num_teams, mu_att_h, sd_att_h)
att_a_effects <- rnorm(num_teams, mu_att_a, sd_att_a)

def_h_effects <- rnorm(num_teams, mu_def_h, sd_def_h)
def_a_effects <- rnorm(num_teams, mu_def_a, sd_def_a)
  • \(\beta_{0}\) = -0.1902157
  • \(home\) = 0.1935705
  • \(\mu_{Hatt}\) = 0
  • \(\sigma_{Hatt}\) = 0.25
  • \(\mu_{Aatt}\) = 0
  • \(\sigma_{Aatt}\) = 0.25
  • \(\mu_{Hdef}\) = 0
  • \(\sigma_{Hdef}\) = 0.25
  • \(\mu_{Adef}\) = 0
  • \(\sigma_{Adef}\) = 0.25

Esses foram os efeitos de ataque e defesa gerados:

Gerando os resultados dos jogos

set.seed(40)

simulate_games <- function(games){
  num_games <- length(games$h)
  home_team <- games$h
  away_team <- games$a
  
  theta_1 <- beta_0 + home_effect + att_h_effects[home_team] + def_a_effects[away_team]
  theta_2 <- beta_0 + att_a_effects[away_team] + def_h_effects[home_team]
  
  y1 <- rpois(num_games, exp(theta_1))
  y2 <- rpois(num_games, exp(theta_2))
  
  games$y1 <- y1
  games$y2 <- y2  
  
  assign("games", games, envir = .GlobalEnv)
}

simulate_games(games)

Análise dos resultados gerados

Alguns resultados que nos dão uma visão geral dos dados são:

  • O time com o melhor ataque da competição foi o Iguaçu balançando as redes 61 vezes.
  • O time com o pior ataque da competição foi o Capibaribe marcando 18 gols.
  • O time com a melhor defesa da competição foi o Guerreiros da Mata tendo sofrido um total de 21 gols.
  • O time com a pior defesa da competição foi o Cacique tendo sofrido um total de 47 gols.
  • A maior goleada do campeonato foi a vitória do Atlético Taquara Verde por 6 a 0 em cima do Pampa
  • O Iguaçu foi o campeão com 79 pontos
  • O Capibaribe foi o lanterna com 38 pontos
  • O Blumenau City foi o primeiro time fora da zona, se salvando do rebaixamento com 43 pontos
Clique para ver a tabela do campeonato completa
Posicao Time Pontos Vitorias Empates Derrotas GM GS SG
1 Iguaçu 79 24 7 7 61 41 20
2 Índios Tupiniquins 66 19 9 10 50 36 14
3 Mandacaru United 60 17 9 12 41 34 7
4 Seriema 57 15 12 11 45 36 9
5 Atlético Palmares 56 15 11 12 40 21 19
6 Gávea Azul 54 13 15 10 33 28 5
7 Sambaqui 54 15 9 14 35 42 -7
8 Riacho do Meio 54 17 3 18 32 26 6
9 Atlético Rio Vermelho 53 14 11 13 35 36 -1
10 Aurora Litorânea 53 13 14 11 33 22 11
11 Guerreiros da Mata 51 14 9 15 34 21 13
12 Dragões do Sertão 50 12 14 12 28 26 2
13 Serra Dourada 49 13 10 15 31 33 -2
14 Borborema 47 11 14 13 22 29 -7
15 Cacique 46 12 10 16 42 47 -5
16 Blumenau City 43 11 10 17 27 24 3
17 Sport Club Xingu 42 10 12 16 37 38 -1
18 Atlético Taquara Verde 41 10 11 17 40 35 5
19 Pampa 39 8 15 15 29 27 2
20 Capibaribe 38 9 11 18 18 28 -10

Definindo os parâmetros das prioris

hyper_params <- list(
  beta_0_mu = 0,
  beta_0_sd = 1000,
  home_mu = 0,
  home_sd = 1000,
  
  att_h_mu = 0,
  sd_att_h_mu = 0,
  sd_att_h_sig = 2.5,
  
  att_a_mu = 0,
  sd_att_a_mu = 0,
  sd_att_a_sig = 2.5,
  
  def_h_mu = 0,
  sd_def_h_mu = 0,
  sd_def_h_sig = 2.5,
  
  def_a_mu = 0,
  sd_def_a_mu = 0,
  sd_def_a_sig = 2.5
)

Estimando os parâmetros com o STAN

data <- append(append(list(ngames = num_games, nteams = num_teams), as.list(games)), hyper_params)

model <- stan_model("./models/poisson_2.stan")
Running /usr/lib64/R/bin/R CMD SHLIB foo.c
using C compiler: ‘gcc (GCC) 13.2.1 20230801’
gcc -I"/usr/include/R/" -DNDEBUG   -I"/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/Rcpp/include/"  -I"/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/RcppEigen/include/"  -I"/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/RcppEigen/include/unsupported"  -I"/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/BH/include" -I"/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/StanHeaders/include/src/"  -I"/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/StanHeaders/include/"  -I"/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/RcppParallel/include/"  -I"/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include    -fpic  -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fexceptions         -Wp,-D_FORTIFY_SOURCE=2 -Wformat -Werror=format-security         -fstack-clash-protection -fcf-protection -g -ffile-prefix-map=/build/r/src=/usr/src/debug/r -flto=auto -ffat-lto-objects  -c foo.c -o foo.o
In file included from /home/arthur/R/x86_64-pc-linux-gnu-library/4.3/RcppEigen/include/Eigen/Dense:1,
                 from /home/arthur/R/x86_64-pc-linux-gnu-library/4.3/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22,
                 from <command-line>:
/home/arthur/R/x86_64-pc-linux-gnu-library/4.3/RcppEigen/include/Eigen/Core:82:12: fatal error: new: No such file or directory
   82 |   #include <new>
      |            ^~~~~
compilation terminated.
make: *** [/usr/lib64/R/etc/Makeconf:191: foo.o] Error 1
iter <- 4000
fit <- sampling(model, data = data, iter = iter, chains = 2, cores = 2)
data {
  int<lower=1> ngames;       
  int<lower=1> nteams;       
  int h[ngames];             
  int a[ngames];             
  int y1[ngames];            
  int y2[ngames];
  real beta_0_mu; real<lower=0> beta_0_sd;
  real home_mu; real<lower=0> home_sd;
  
  real att_h_mu; real sd_att_h_mu; real sd_att_h_sig;
  real att_a_mu; real sd_att_a_mu; real sd_att_a_sig;
  
  real def_h_mu; real sd_def_h_mu; real sd_def_h_sig;
  real def_a_mu; real sd_def_a_mu; real sd_def_a_sig;
}

parameters {
  real beta_0;
  real home;                 
  vector[nteams] att_h;
  vector[nteams] att_a;
  vector[nteams] def_h;
  vector[nteams] def_a;  
  real<lower=0> sd_att_h;
  real<lower=0> sd_att_a;
  real<lower=0> sd_def_h;
  real<lower=0> sd_def_a;
}

model {
  for (g in 1:ngames) {
    y1[g] ~ poisson_log(beta_0 + home + att_h[h[g]] + def_a[a[g]]);
    y2[g] ~ poisson_log(beta_0 + att_a[a[g]] + def_h[h[g]]);
  }

  beta_0 ~ normal(beta_0_mu, beta_0_sd);
  home ~ normal(home_mu, home_sd);
  sd_att_h ~ cauchy(sd_att_h_mu, sd_att_h_sig);
  sd_att_a ~ cauchy(sd_att_a_mu, sd_att_a_sig);
  
  sd_def_h ~ cauchy(sd_def_h_mu, sd_def_h_sig);
  sd_def_a ~ cauchy(sd_def_a_mu, sd_def_a_sig);
  
  
  att_h ~ normal(att_h_mu, sd_att_h);
  att_a ~ normal(att_a_mu, sd_att_a);
  
  def_h ~ normal(def_h_mu, sd_def_h);
  def_a ~ normal(def_a_mu, sd_def_a);
}

generated quantities {
  int y1_pred[ngames];
  int y2_pred[ngames];
  vector[ngames] log_lik;

  for (g in 1:ngames) {
    y1_pred[g] = poisson_log_rng(beta_0 + home + att_h[h[g]] + def_a[a[g]]);
    y2_pred[g] = poisson_log_rng(beta_0 + att_a[a[g]] + def_h[h[g]]);
    
    log_lik[g] = poisson_log_lpmf(y1[g] | beta_0 + home + att_h[h[g]] + def_a[a[g]]) + 
                 poisson_log_lpmf(y2[g] | beta_0 + att_a[a[g]] + def_h[h[g]]);
  }
  
  
}

Analisando possíveis problemas

Visualizando os valores estimados